MSKIDS analysis

This report examines the effect of Multiple Sclerosis (MS) on brain volumes in a pediatric sample. The data used in this analysis consists of a harmonized dataset of volumetric measurements of 145 brain regionm and covariates such as age, sex and MS status.

Multiple linear regression models were run to investigate the relationship between MS and brain volumes. The models were run separately for each brain region of interest (ROI) and the results were reported for ROIs that showed significant associations with MS.

library(tidyverse)
library(broom)
library(shiny)

# load the data
df <- read.csv('data/harmonized_data_PNC+MS.csv', stringsAsFactors = TRUE) %>% 
  mutate(MS = as.factor(MS)) %>% 
  mutate_at(vars(starts_with('X')), ~ . *0.001) %>% # convert mm^3 to cm^3
  rename(ICV = X702,
         Sex = sex, 
         Age = age)

# capture roi indices as vector for functions
ROI_INDICES <- df %>% 
  select(starts_with("X")) %>% 
  colnames() 

# load the ROI dictionary
dict_df <- readxl::read_excel('data/MUSE_ROI_Dict.xlsx') %>% 
  filter(ROI_NAME == 'ICV' | ROI_INDEX < 300) %>% 
  select(ROI_INDEX, TISSUE_SEG, ROI_NAME) %>% 
  mutate(ROI_INDEX = paste0('X', ROI_INDEX),
         TISSUE_SEG = recode(TISSUE_SEG, `GM+WM+VN+BS+cCSF` = 'ICV')) %>% 
  filter(ROI_INDEX %in% ROI_INDICES)

Sample size

# print cell sizes and total sample size
df %>% 
  count(Sex, MS) %>% 
  bind_rows(summarize(., n = sum(n)))
##      Sex   MS    n
## 1 FEMALE    0  696
## 2 FEMALE    1   50
## 3   MALE    0  590
## 4   MALE    1   17
## 5   <NA> <NA> 1353

Models

Multiple regression models were run to investigate the relationship between MS and brain volumes. The models were run separately for each brain region of interest (ROI) and the results were reported for ROIs that showed significant associations with MS. The models controlled for sex, age, age squared and intracranial volume (ICV). Additionally, interactions between MS and sex were also included in the model. The results of these models are reported and visualized using box plots to help interpret the findings.

The analysis shows that MS is associated with specific brain volume changes in pediatric population.

Model notation:

\[\begin{align} ROI &= \beta_0 + \beta_1*Sex + \beta_2*Age + \beta_3*Age^2 + \beta_4*Sex*Age + \beta_5*MS*Sex + \beta_6*ICV + \epsilon \end{align}\]

Model R formula:

ROI ~ Sex + Age + Age^2 + Sex*Age + MS*Sex + ICV

# No MS-sex interation Model: ROI ~ Sex + Age + Age^2 + Sex\*Age + MS + ICV
run_model <- function(outcome){
  #' run model for each ROI
  f <- as.formula(sprintf("%s ~ Sex + Age + Age^2 + Sex*Age + MS + ICV", outcome))
  res <- lm(f, data = df) 
}

# run models searately
models <- ROI_INDICES %>% 
  purrr::map(run_model) %>% 
  purrr::map(tidy) %>% 
  setNames(ROI_INDICES) %>% 
  bind_rows(.id = 'ROI_INDEX') %>%
  left_join(dict_df, by = c('ROI_INDEX')) %>% # join with ROI dictionary
  select(ROI_INDEX, ROI_NAME, TISSUE_SEG, everything())

# use FDR correction
models %>% 
  filter(term == 'MS1') %>% 
  mutate(p.value = p.adjust(p.value, method='fdr')) %>% 
  filter(p.value < 0.05)
run_model <- function(outcome){
   #' run model for one ROI
  f <- as.formula(sprintf("%s ~ Sex + Age + Age^2 + Sex*Age + MS*Sex + ICV", outcome))
  res <- lm(f, data = df)
}

# run models searately for each ROI
models <- ROI_INDICES %>% 
  purrr::map(run_model) %>% 
  purrr::map(tidy) %>% 
  setNames(ROI_INDICES) %>% 
  bind_rows(.id = 'ROI_INDEX') %>% 
  left_join(dict_df, by = c('ROI_INDEX')) %>% 
  select(ROI_INDEX, ROI_NAME, TISSUE_SEG, everything())

# select effects/terms of interest and adjust p values. keep significant effects
sig_effects <- models %>% 
  filter(term %in% c('SexMALE:MS1', 'MS1')) %>% 
  mutate(p.value = p.adjust(p.value, method='fdr')) %>% 
  filter(p.value < 0.05) %>% 
  arrange(by = TISSUE_SEG)

Significant main effects: MS

With healthy controls as the reference group, the main effect (in \(cm^3\)) of having MS is shown in the estimate column for each significant ROI.

sig_effects %>% 
  filter(term == 'MS1')
## # A tibble: 9 × 8
##   ROI_INDEX ROI_NAME              TISSU…¹ term  estim…² std.e…³ stati…⁴  p.value
##   <chr>     <chr>                 <chr>   <chr>   <dbl>   <dbl>   <dbl>    <dbl>
## 1 X59       Right Thalamus Proper GM      MS1    -0.659  0.0675   -9.77 1.14e-19
## 2 X60       Left Thalamus Proper  GM      MS1    -0.680  0.0691   -9.85 1.10e-19
## 3 X107      Left AnG   angular g… GM      MS1    -0.778  0.207    -3.77 6.21e- 3
## 4 X121      Left FRP   frontal p… GM      MS1    -0.228  0.0730   -3.12 4.05e- 2
## 5 X122      Right FuG   fusiform… GM      MS1    -0.452  0.142    -3.19 3.80e- 2
## 6 X139      Left MCgG  middle ci… GM      MS1    -0.388  0.0936   -4.15 1.49e- 3
## 7 X4        3rd Ventricle         VN      MS1     0.153  0.0312    4.89 6.50e- 5
## 8 X81       frontal lobe WM right WM      MS1     2.33   0.709     3.29 3.02e- 2
## 9 X82       frontal lobe WM left  WM      MS1     3.10   0.699     4.44 4.68e- 4
## # … with abbreviated variable names ¹​TISSUE_SEG, ²​estimate, ³​std.error,
## #   ⁴​statistic

Results show that across Grey Matter (GM) regions, MS patients show decreased volumes relative to controls; whereas in the \(3^{rd}\) ventricle and the left and right frontal lobes, MS patients had higher volume than controls.

Significant interaction effects

With MS females as the reference group, the interaction effect (in \(cm^3\)) of having MS and being male is shown in the estimate column for each significant ROI. These effects are independent of the main effect of sex.

sig_effects %>% 
  filter(term == 'SexMALE:MS1')
## # A tibble: 4 × 8
##   ROI_INDEX ROI_NAME              TISSUE…¹ term  estim…² std.e…³ stati…⁴ p.value
##   <chr>     <chr>                 <chr>    <chr>   <dbl>   <dbl>   <dbl>   <dbl>
## 1 X59       Right Thalamus Proper GM       SexM…   0.812  0.132     6.17 6.47e-8
## 2 X60       Left Thalamus Proper  GM       SexM…   0.893  0.135     6.63 4.81e-9
## 3 X49       Right Inf Lat Vent    VN       SexM…   0.121  0.0324    3.73 6.39e-3
## 4 X82       frontal lobe WM left  WM       SexM…  -4.28   1.36     -3.14 4.05e-2
## # … with abbreviated variable names ¹​TISSUE_SEG, ²​estimate, ³​std.error,
## #   ⁴​statistic

Results that show that MS males have higher volumes than females in right and left Thalamus and right inferior lateral ventricle. On the other hand MS males exhibit reduced volumes in the left frontal lobe than MS females.

Visualization

The means for the ROIs with a significant main effect (of MS status) and/or interaction (of MS status with sex) are plotted below by age and MS status.

plot_means <- function(roi, data=df) {
  #' make a box plot for a given roi
  roi_name <- dict_df %>% 
    filter(ROI_INDEX == roi) %>%
    pull(ROI_NAME)
  
  sig_effects <- sig_effects %>% 
    filter(ROI_INDEX == roi) %>% 
    pull(term) %>% 
    recode("MS1" = "main effect", "SexMALE:MS1" = "interaction")
  
  title <- ggtitle(sprintf("Significant effects of MS: %s", knitr::combine_words(sig_effects)))
  
  data %>% 
    mutate(MS=fct_recode(MS, Yes = "1", No = "0")) %>% 
    ggplot(aes_string(y=roi, x='MS', fill='Sex')) + 
    geom_boxplot() + 
    ylab(sprintf("%s (cm3)", roi_name)) +
    theme_bw() + title
    
}

## make a boxplot for ROIs with significant effects
sig_rois <- sig_effects %>% 
  pull(ROI_INDEX) %>% 
  unique()

plots <- sig_rois %>% 
  purrr::map(~ plot_means(.x), data=df) %>% 
  setNames(sig_rois)

Volumes

X59

X60

X107

X121

X122

X139

X4

X49

X81

X82

Volumes minus Age and ICV effect

# regress out nuisance effects from roi
reg_out_roi <- function(roi){
  #' run model for each ROI
  model <- lm(roi ~ Age + Age^2 + ICV, data = df) 
  model$residuals
}

reg_out_df <- df %>% 
   mutate_at(vars(starts_with('X')), reg_out_roi)
  
plots_reg_out <- sig_rois %>% 
  purrr::map(plot_means, data=reg_out_df) %>% 
  setNames(sig_rois)

X59

X60

X107

X121

X122

X139

X4

X49

X81

X82

Distribution of control variables

Age

df$Age %>% summary() %>% as.matrix() %>% t() %>% as.data.frame()
##   Min.  1st Qu.   Median     Mean  3rd Qu. Max.
## 1 5.69 12.16667 15.58333 15.36221 18.33333 29.4
df %>% 
   mutate(MS=fct_recode(MS, Yes = "1", No = "0")) %>% 
  ggplot(aes(y=Age, fill=Sex, x = MS)) + geom_boxplot() + theme_bw()

ICV

df$ICV %>% summary() %>% as.matrix() %>% t() %>% as.data.frame()
##       Min.  1st Qu. Median     Mean  3rd Qu.    Max.
## 1 1032.233 1317.867 1411.6 1419.895 1516.664 1872.69
df %>% 
  mutate(MS=fct_recode(MS, Yes = "1", No = "0")) %>% 
  ggplot(aes(y=ICV, fill=Sex, x = MS)) + geom_boxplot() + theme_bw() + ylab('ICV (cm3)')